## Start

In [1]:
#Import libs 
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import statistics as st
import math
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
from tqdm import tqdm
import textwrap
import seaborn as sns
from statistics import mean
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score as r2, mean_absolute_error as mae, mean_squared_error as mse, accuracy_score
from sklearn.metrics import make_scorer
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.inspection import permutation_importance
from sklearn.compose import TransformedTargetRegressor
from sklearn.pipeline import Pipeline
from scipy.stats import gmean
from xgboost import XGBRegressor
from scipy import stats
from datetime import datetime
from pyproj import Proj, transform
import logging
import random
import mlflow
import mlflow.sklearn
pd.set_option("display.precision", 3)
pd.set_option('display.float_format', lambda x: '%.5f' % x)
pd.set_option('display.max_columns', 15)

### Upload maindata

In [298]:
path = 'C:\\jupyter\\SPP\\input\\'
oil_prod = pd.read_csv(path + 'ACG_prod_oil_data_20231201.csv')
oil_prod['Date'] = pd.to_datetime(oil_prod['Timestamp'])
wat_prod = pd.read_csv(path + 'ACG_prod_water_data_20231201.csv')
wat_prod['Date'] = pd.to_datetime(oil_prod['Timestamp'])
perf_init = pd.read_csv(path + 'ACG_prod_completion.csv')[1:]
pta_pi_init = pd.read_csv(path + 'ACG_prod_pta_pi.csv')
kh_dist0_cut = pd.read_csv(r'C:\jupyter\SPP\inputoutput\avg_prop_tst_kh_dist0_cut.csv').drop('Unnamed: 0', axis=1)
prod_welldata = pd.read_excel(r'C:\jupyter\SPP\input\ACG_prod_welldata.xlsx').drop('Unnamed: 0',axis=1)

In [None]:
prod_welldata = prod_welldata[prod_welldata['Formation name'].notna()]
prod_welldata_bal = prod_welldata[prod_welldata['Formation name'].isin(['Bal8','Bal10'])]
prod_welldata_bal = prod_welldata_bal.rename(columns={'Well Name name':'well', 'Formation name':'FORMATION_up'})
prod_welldata_bal[['well_v2', 'fm']] = prod_welldata_bal['well'].str.split('_', expand=True)
prod_welldata_bal = prod_welldata_bal.drop(['well', 'fm'], axis=1)
prod_welldata_bal = prod_welldata_bal.rename(columns={'well_v2':'well'})
prod_welldata_bal = prod_welldata_bal[['well', 'FORMATION_up', 'Top of Screen mTVD SCS', 'Mid-Screen mTVD SCS', 'Bottom of Screen mTVD SCS']]
prod_welldata_bal = prod_welldata_bal[prod_welldata_bal['Mid-Screen mTVD SCS'].notna()]
prod_welldata_bal.loc[prod_welldata_bal.FORMATION_up == 'Bal8', 'FORMATION_up'] = 'Balakhany VIII'
prod_welldata_bal.loc[prod_welldata_bal.FORMATION_up == 'Bal10', 'FORMATION_up'] = 'Balakhany X'
prod_welldata_bal.head(3)

In [82]:
def prod_data_processing():
    def prod_data_transformation(data, var):
        df_lst = []
        for well in data.columns[1:-1]:
            lst = []
            for ind, row in enumerate(data[well]):
                lst.append((well, row, data['Date'].iloc[ind]))
            df_lst.append(pd.DataFrame(lst, columns=['well',var,'date']))
        res = pd.concat(df_lst)
        return res
    oil_prod_df = prod_data_transformation(oil_prod, 'oil')
    wat_prod_df = prod_data_transformation(wat_prod, 'water')
    def wellname_rename(data):    
        for wellname in data.well.unique()[:]:
            new_wellname = '_'.join(wellname.split(' ')[0:1])
            data.loc[data.well==wellname, 'new_well'] = new_wellname
        return data.reset_index().drop('index', axis=1)
    oil_prod_df_rn = wellname_rename(oil_prod_df)
    wat_prod_df_rn = wellname_rename(wat_prod_df)
    oil_wat_prod =  oil_prod_df_rn.drop('well', axis=1).set_index(['new_well', 'date']).join(
                    wat_prod_df_rn.drop('well', axis=1).set_index(['new_well', 'date'])).reset_index()
    oil_wat_prod['oil'] = abs(oil_wat_prod.oil)
    oil_wat_prod['water'] = abs(oil_wat_prod.water)
    oil_wat_prod['liquid'] = oil_wat_prod.oil + oil_wat_prod.water
    oil_wat_prod.rename(columns={'new_well':'well_fm'}, inplace=True)
    return oil_wat_prod
oil_wat_prod = prod_data_processing()
oil_wat_prod[['well', 'fm']] = oil_wat_prod['well_fm'].str.split('_', expand=True)
oil_wat_prod = oil_wat_prod.drop('well_fm', axis=1)
oil_wat_prod = oil_wat_prod[['well', 'fm', 'date', 'oil', 'water', 'liquid']]

In [202]:
perf_init = perf_init.rename(columns={'wellName':'well'})
perf = perf_init[['well', 'FORMATION', 'DEPTH', 'TVD_SCS', 'TST', 'PERF', 'REMARK']]
perf = perf[perf.FORMATION.notna()]
perf_bal = perf[perf.FORMATION.str.contains('Balakhany VIII','Balakhany X')]
perf_bal.loc[perf_bal.FORMATION.str.contains('Balakhany VIII'), 'FORMATION_up'] = 'Balakhany VIII'
perf_bal.loc[perf_bal.FORMATION.str.contains('Balakhany X'), 'FORMATION_up'] = 'Balakhany X'
perf_bal['PERF'] = perf_bal.PERF.astype('float')
perf_bal_perf = perf_bal[(perf_bal.PERF.notna())]
perf_bal_perf['PERF'] = perf_bal_perf['PERF'].replace(-9999, 0)

In [None]:
pta_pi_init = pta_pi_init.rename(columns={'drill_name':'well'})
pta_pi = pta_pi_init[[  'well', 'date_st', 'qoil_m3', 'qgas_m3', 'qwtr_m3', 'si_duration_hrs',
                        'skin', 'perm_md', 'kh_mdm', 'pi_m3_barg','ref_depth_m', 'well_type','interval_name']]
pta_pi = pta_pi[pta_pi.pi_m3_barg.notna()]
pta_pi[['well_v2', 'fm']] = pta_pi['well'].str.split('_', expand=True)
pta_pi.loc[pta_pi.well_v2 == 'C20Z(WI)', 'well_v2'] = 'C20Z'
pta_pi.loc[pta_pi.well_v2 == 'SDC01', 'well_v2'] = 'C01'
pta_pi.loc[pta_pi.well_v2 == 'SDC02', 'well_v2'] = 'C02'
pta_pi.loc[pta_pi.well_v2 == 'SDC03Z', 'well_v2'] = 'C03Z'
pta_pi.loc[pta_pi.well_v2 == 'SDC04Z', 'well_v2'] = 'C04Z'
pta_pi.loc[pta_pi.well_v2 == 'SDD02', 'well_v2'] = 'D02'
pta_pi.loc[pta_pi.well_v2 == 'SDD03', 'well_v2'] = 'D03'
pta_pi.loc[pta_pi.well_v2 == 'SDD04Y', 'well_v2'] = 'D04Y'
pta_pi.loc[pta_pi.well_v2 == 'SDG04', 'well_v2'] = 'G04'
pta_pi.loc[pta_pi.well_v2 == 'SDG03', 'well_v2'] = 'G03'
pta_pi.loc[pta_pi.well_v2 == 'SDC05Z', 'well_v2'] = 'C05Z'
pta_pi.loc[pta_pi.well_v2 == 'SDG01', 'well_v2'] = 'G01'
pta_pi.loc[pta_pi.well_v2 == 'SDG02Z', 'well_v2'] = 'G02Z'
pta_pi_join = pta_pi[['well_v2','pi_m3_barg','kh_mdm','ref_depth_m', 'date_st']]
pta_pi_join = pta_pi_join.rename(columns={'well_v2':'well'})
pta_pi_join.head(3)

In [None]:
kh_dist0_cut = kh_dist0_cut[['well','FORMATION_up','KHtst','field']]
kh_dist0_cut.head(3)

In [357]:
pta_pi_join_v2 = pta_pi_join.set_index('well').join(prod_welldata_bal.set_index('well')).reset_index()
pta_pi_join_v2 = pta_pi_join_v2[pta_pi_join_v2['Mid-Screen mTVD SCS'].notna()]
pta_pi_join_v3 = pta_pi_join_v2.set_index(['well','FORMATION_up']).join(kh_dist0_cut.set_index(['well','FORMATION_up'])).reset_index()
pta_pi_join_v3['date_st'] = pd.to_datetime(pta_pi_join_v3['date_st'])
pta_pi_join_v3 = pta_pi_join_v3.sort_values(['well','date_st'])
pta_pi_join_v3 = pta_pi_join_v3[pta_pi_join_v3.KHtst.notna()]
pta_pi_join_v4 = pta_pi_join_v3.groupby(['well','FORMATION_up']).agg({  'pi_m3_barg': lambda x: x.iloc[0],
                                                                        'kh_mdm':'mean',
                                                                        'KHtst':'mean',
                                                                        'date_st': lambda x: x.iloc[0],
                                                                        'field': lambda x: x.iloc[0]}).reset_index()
pta_pi_join_v4_bal8 = pta_pi_join_v4[pta_pi_join_v4.FORMATION_up=='Balakhany VIII']
pta_pi_join_v4_bal10 = pta_pi_join_v4[pta_pi_join_v4.FORMATION_up=='Balakhany X']

In [None]:
fig = px.scatter(   pta_pi_join_v4, x='pi_m3_barg', y='KHtst', color='field', symbol='FORMATION_up',
                    hover_data={'pi_m3_barg': True, 'KHtst': True, 'FORMATION_up': True, 'well': True, 'field':True, 'date_st':True},
                    height=500, width=800)
fig.update_traces(marker=dict(size=10, line=dict(width=1, color='gray')), selector=dict(mode='markers'))

In [None]:
for well in oil_wat_prod.well.unique()[:]:
    data = oil_wat_prod[oil_wat_prod.well == well]
    data = data.round({'oil':0, 'water':0})
    oil = go.Scatter(   x=data.date, y=data.oil,
                        mode='markers',
                        marker=dict(color='green', size=7, opacity=0.75, line=dict(color='rgb(47, 57, 61)', width=0.5)))
    wat = go.Scatter(   x=data.date, y=data.water,
                        mode='markers',
                        marker=dict(color='blue', size=7, opacity=0.75, line=dict(color='rgb(47, 57, 61)', width=0.5)))
    fig = go.Figure()
    fig.add_trace(oil)
    fig.add_trace(wat)
    fig.update_xaxes(title_text='time')
    fig.update_yaxes(title_text='prod')
    fig.update_layout(  title_text= (well), width=1300, height=400, 
                        margin=dict(l=10,r=10,b=10,t=50), showlegend=False)
    fig.show()