In [None]:
# Functions defined here will be available to call in
# the code for any table.
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
#
# Foundry TS libraries
from foundryts import FoundryTS,Interval,NodeCollection
import foundryts.functions as F
import foundryts.nodes as N
fts = FoundryTS()
#
#####################################
# Injection well correction-o-meter #
#####################################
#
# Derives linear coefficient fits for well data over a riser
# to help identify where chokes are being eroded / calibration
# is going awry
#
# Contact: D Robbins
#
#
#################################
# Options - fill out as desired #
#################################
#
# data grabber options
my_riser = 'riser_1'                   # choose riser to look at (R180 = central, R182 = N/L, R185 = W/FW)
my_start_date = '2019/02/17 00:00:00'  # start time
my_end_date = '2019/02/18 00:00:00'  # start time
#my_end_date = 'NOW'    # end time (use "NOW" for present datetime)
#
my_interp_seconds = 60                # recommend 300 s intervals (5 min)
#
# specify wells to believe rate (i.e. take these out of the linear regression period)
# if you want to force a linear coefficient to 1, add it here
my_believed_well_list = ['I01','I02']   # CW15 is fishy
my_ignored_well_list = ['I03'] # hard force these to zero
#
# more clever options
ignore_zero_periods = 1             # if 1, ignore any period with no wells flowing (recommended)
assume_intercept = False            # if True, assume correction is a*Q + b (rather than straight linear a) - not recommended
split_well_rates = 0                # if 1, split well rates into sub-categories (to try to better define correction across rates; 0 = just use linear correction)
Lasso_alpha = 1.0                   # multiplication of L1 regulariser term for Lasso regression (recommend 1.0; don't use anything at 0 or negative)
pick_coeffs = 0                     # if 0, use best linear model; else 1 = unregularised linear regression, 2 = Lasso model (recommend 0)
#
##############################
# Dictionaries - leave alone #
##############################
#
# riser to well dictionary
dict_riser_to_well = {
    'riser_1':['I01', 'I02', 'I03'],
}

In [None]:
def well_data_scrubber(data_grabber):

    # take a copy of the df
    df = data_grabber.copy(deep=True)

    #
    # list of wells that we have
    wells_wanted = dict_riser_to_well[my_riser]
    
    # build list of corrected rates (as some choke CV report whilst offline ...)
    df_output = df[['time',my_riser+'_RiserRate']]
    df_output.columns = ['time','riser_rate']
    #
    for well in wells_wanted:
        #
        # build column list
        col_rate = well+'_rate_preferred'
        col_IWV = well+'_IWV'
        col_IMV = well+'_IMV'
        col_rate_corrected = well+'_rate'
        #
        # grab well df
        df_well = df[['time',col_rate,col_IWV,col_IMV]].copy(deep=True)
        #
        # fill nan values
        df_well.fillna(-1.0,inplace=True)

        # loop over df and overwrite rate if wells are offline
        well_rate_list = []

        for i,row in df_well.iterrows():
            #
            # filter out NaN / negatives
            if row[col_rate] < 0:
                well_rate_list.append(0.0)
            #
            # append data if both valves open to flow
            else:
                if (row[col_IWV] < 0) and (row[col_IMV] < 0): # valves are both open
                    well_rate_list.append(row[col_rate])
                else:
                    well_rate_list.append(0.0)
        #
        # append corrected rate column column to df_well
        df_well.insert(len(df_well.columns),col_rate_corrected,well_rate_list)
        #
        # drop columns from df_well
        df_well = df_well[['time',col_rate_corrected]]
        #
        # merge with df
        df_output = pd.merge(df_output,df_well,on='time')
        #
    #
    # return df
    return df_output


In [None]:
def linear_regressor(well_data_scrubber):
    
    df = well_data_scrubber.copy(deep=True)
    my_well_list = dict_riser_to_well[my_riser]

    # build list of arrays to use for linear regression
    
    # list of 'truth' wells (set to 1) and 'ignore' wells (set to 0)
    my_truth_list = []
    my_lie_list = []
    my_ignore_list = []
    for well in my_well_list:
        if well in my_believed_well_list:
            my_truth_list.append(well+'_rate')
        elif well in my_ignored_well_list:
            my_ignore_list.append(well+'_rate')
        else:
            my_lie_list.append(well+'_rate')
    
    if len(my_lie_list) == 0:
        print('List of "untrustworthy wells" is zero - no point regressing on this.')
        quit()

    # get the sum-of-truth rates to train model on (i.e. riser - trustworthy wells)
    riser_rates = []
    for i,row in df[['riser_rate']+my_truth_list].iterrows():
        my_true_rate = row['riser_rate']
        for well in my_truth_list:
            my_true_rate -= row[well]
        riser_rates.append(my_true_rate)

    # now, get the array of well rates (shape is n_well - n_trust x n_times)
    well_rates = []
    for i,row in df[my_lie_list].iterrows():
        indiv_well_rate_list = []
        for well in my_lie_list:
            indiv_well_rate_list.append(row[well])
        well_rates.append(indiv_well_rate_list)

    # ignore any zero-rate periods if requested
    if ignore_zero_periods == 1:
        #
        new_riser_rates = []
        new_well_rates = []
        #
        for i,riserQ in enumerate(riser_rates):
            if riserQ > 1.0:
                new_riser_rates.append(riserQ)
                new_well_rates.append(well_rates[i]) # should pick up n-d array
        #
        riser_rates = new_riser_rates
        well_rates = new_well_rates

    if len(riser_rates) == 0:
        print('No riser injection over period selecting - no point regressing on this.')
        quit()

    # convert rates to np arrays
    riser_rates = np.array(new_riser_rates)
    riser_rates = riser_rates.reshape(riser_rates.shape[0],1) # has to be understood as '1-D' by numpy ...
    well_rates = np.array(new_well_rates)

    # PERFORM LINEAR REGRESSION
    #
    # firstly, try a simple linear regression model
    well_regressor_1 = LinearRegression(fit_intercept=assume_intercept).fit(well_rates,riser_rates)
    R2_lin1 = well_regressor_1.score(well_rates,riser_rates)
    #
    # save linear coefficients
    linear_coeffs_1 = well_regressor_1.coef_[0]
    #
    # now, try Lasso model
    well_regressor_2 = Lasso(fit_intercept=assume_intercept,alpha=1.0,positive=True).fit(well_rates,riser_rates)
    R2_lin2 = well_regressor_2.score(well_rates,riser_rates)
    #
    # print out R2 score
    # save linear coefficients
    linear_coeffs_2 = well_regressor_2.coef_

    # pick a linear method
    if pick_coeffs == 0:        # use best model from R2 score
        if R2_lin1 > R2_lin2:
            linear_coeffs = linear_coeffs_1
        else:
            linear_coeffs = linear_coeffs_2
    elif pick_coeffs == 1:      # use simple linear regression model
        linear_coeffs = linear_coeffs_1
    else:                       # use Lasso regression model
        linear_coeffs = linear_coeffs_2
    
    #
    # build linear coefficients
    well_coeffs = []
    i = 0
    for well in my_well_list:
        if well in my_believed_well_list:
            well_coeffs.append(1.0)
        elif well in my_ignored_well_list:
            well_coeffs.append(0.0)
        else:
            well_coeffs.append(linear_coeffs[i])
            i += 1
    #
    # build well column list
    well_col_list = []
    for well in my_well_list:
        well_col_list.append(well+'_rate')

    # build rate sums
    well_sum_rate_list = []
    well_sum_rate_corrected_list = []
    for index,row in df[well_col_list].iterrows():
        well_sum_rate = 0.0
        well_sum_rate_corrected = 0.0
        for i,well in enumerate(well_col_list):
            well_sum_rate += row[well]
            well_sum_rate_corrected += row[well]*well_coeffs[i]
        #
        well_sum_rate_list.append(well_sum_rate)
        well_sum_rate_corrected_list.append(well_sum_rate_corrected)

    # return df with well sum
    df_return = df[['time','riser_rate']]

    df_return.insert(len(df_return.columns),'well_sum_raw',well_sum_rate_list)
    df_return.insert(len(df_return.columns),'well_sum_corrected',well_sum_rate_corrected_list)

    # print outputs
    if pick_coeffs == 0:        # use best model from R2 score
        if R2_lin1 > R2_lin2:
            print('Model used: ordinary least squares')
            print(str('R2 score: %.4f'%(R2_lin1)))
        else:
            print('Model used: Lasso')
            print(str('R2 score: %.4f'%(R2_lin2)))
    elif pick_coeffs == 1:      # use simple linear regression model
        print('Model used: ordinary least squares')
        print(str('R2 score: %.4f'%(R2_lin1)))
    else:                       # use Lasso regression model
        print('Model used: Lasso')
        print(str('R2 score: %.4f'%(R2_lin2)))

    print('')
    print('Well coefficients estimated:')
    for i,well in enumerate(my_well_list):
        well_coeff = well_coeffs[i]
        if (well_coeff > 1.02):
            print(str(well)+str(' coefficient: %.2f'%(well_coeff))+str(' - under-estimates'))
        elif (well_coeff > 0.98):
            print(str(well)+str(' coefficient: %.2f'%(well_coeff))+str(' - OK'))
        else:
            print(str(well)+str(' coefficient: %.2f'%(well_coeff))+str(' - over-estimates'))

    return df_return


In [None]:
def well_correction_plotter(linear_regressor):
    
    df = linear_regressor.copy(deep=True)

    # simply plot up the corrected rates
    plt.subplots(figsize=(10,5))
    #
    plt.scatter(df['time'],df['riser_rate'],label='Riser rate',s=30,c='black')
    plt.scatter(df['time'],df['well_sum_raw'],label='Well sum (raw)',s=5,c='red')
    plt.scatter(df['time'],df['well_sum_corrected'],label='Well sum (corrected)',s=5,c='green')
    #
    plt.title('Comparison of corrected and raw rates')
    plt.ylabel('Rate (m3/hr)')
    #
    plt.xlim([min(df['time'].tolist()),max(df['time'].tolist())])
    plt.ylim(0,400)
    #
    plt.legend()
    plt.show()

In [None]:
def online_wells_plot(well_data_scrubber):
    
    df = well_data_scrubber.copy(deep = True)

    # run through wells and pick up when they are online
    df_plot = df[['time','riser_rate']]

    # well list
    my_well_list = dict_riser_to_well[my_riser]

    for i,well in enumerate(my_well_list):
        #
        well_online_list = []
        #
        # append well ID if rate > 0
        for rate in df[well+'_rate'].tolist():
            if rate > 0.0:
                well_online_list.append(i+1)
            else:
                well_online_list.append(0.0)
        #
        # insert column
        df_plot.insert(len(df_plot.columns),well,well_online_list)
    #

    # plot graph
    for well in my_well_list:
        plt.scatter(df_plot['time'],df_plot[well],label='well')
    
    # y labels
    y_range = [i for i in range(1,len(my_well_list)+1)]
    plt.yticks(y_range,my_well_list)
    plt.xlim([min(df['time'].tolist()),max(df['time'].tolist())])
    plt.ylim([0.5,len(my_well_list)+0.5])

    plt.title('Well online status')

    # show the plot
    plt.show()

In [None]:
def processed_data_plotter(well_data_scrubber):
    
    # pull copy of df
    df = well_data_scrubber.copy(deep=True)

    fig,ax = plt.subplots(figsize=(10,10))

    # plot riser data
    ax.plot(df['time'],df['riser_rate'],label='Riser',color='black')

    ax1 = ax.twinx()
    # plot well data
    for well in dict_riser_to_well[my_riser]:
        rate_col = well+'_rate'
        ax1.plot(df['time'],df[rate_col],label=well)
    
    # label axes
    ax.set_ylabel('Riser rate (m3/hr)')
    ax1.set_ylabel('Well rate (m3/hr)')

    # plot legend
    fig.legend()

    plt.title('Riser and well rates')

    # show the plot
    plt.show()