In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
import seaborn as sns
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import PolynomialFeatures, OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split, ParameterSampler, ParameterGrid
from skgstat import Variogram, DirectionalVariogram, OrdinaryKriging
from skgstat.util.cross_validation import jacknife
from pykrige.uk import UniversalKriging
from pykrige.rk import RegressionKriging
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import dill as pickle
import pprint, zipfile, os, warnings
warnings.filterwarnings('ignore')
plt.style.use('ggplot')
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', '{:,.2f}'.format)

with zipfile.ZipFile('../data/merged_oilgas_prodwells.csv.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
    zipf.write('../data/merged_oilgas_prodwells.csv', arcname='merged_oilgas_prodwells.csv')

df = pd.read_csv('../data/merged_oilgas_prodwells.csv', low_memory=False)
df_saved = df.copy()

print(df['GasProd'].describe())
print('------------------------')
print(df.info())

fig, ax = plt.subplots(1, figsize=(8,5))
corr_plot = ax.scatter(np.array(df['Bottom_hole_longitude']), np.array(df['Bottom_hole_latitude']), 
                       s=50, c=np.array(df['GasProd']), cmap='plasma', alpha=0.7)
plt.colorbar(corr_plot).set_label('Gas Production')
plt.title('Overview of Gas Production Log1p')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.show()
plt.close()

df.head()

FileNotFoundError: [Errno 2] No such file or directory: '../data/merged_oilgas_prodwells.csv.zip'

In [None]:
df = df_saved.copy()
coordinates = np.array(df[['longitude','latitude']].values)
target = np.array(df['gas_prod'].values)

poly = PolynomialFeatures(degree=3, include_bias=False)
coordinates_poly = poly.fit_transform(coordinates)

reg = LinearRegression().fit(coordinates_poly, target)
trend = reg.predict(coordinates_poly)
detrended_gas = target - trend

df_detrended = df_saved.copy()
df_detrended['gas_prod'] = detrended_gas
print(df_detrended.describe())

df_detrended = df_detrended[['gas_prod','longitude','latitude','depth','elevation',
              'well','status','field','geology']]
df_detrended.head()

df = df_detrended.copy()
coordinates = np.array(df[['longitude','latitude']].values)
target = np.array(df['gas_prod'].values)

detrend_variogram = DirectionalVariogram(
    coordinates=coordinates, 
    values=target)
pprint.pprint(detrend_variogram.describe())

detrend_variogram.plot()
plt.title('Variogram of Gas After Detrending')

fig, ax = plt.subplots(1, figsize=(8,5))
corr_plot = ax.scatter(df['longitude'], df['latitude'], 
                       s=50, c=df['gas_prod'], cmap='plasma', alpha=0.7)
plt.colorbar(corr_plot).set_label('Detrended Gas Production (Barrels)')
plt.title('Flattened Overview of Detrended Gas Production')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.show()
plt.close()

df = df_detrended.copy()
target = df['gas_prod']

numerical = ['gas_prod','longitude','latitude']
imperial = ['depth','elevation']
categorical = ['well','status','field','geology']


preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', numerical),
        ('imp', StandardScaler(), imperial),
        ('cat', OneHotEncoder(sparse_output=False), categorical)])

df_transformed = preprocessor.fit_transform(df)
df_transformed = pd.DataFrame(df_transformed, columns=preprocessor.get_feature_names_out())
df_transformed.to_csv('../data/encoded_gaswells.csv', index=False)
print(df_transformed[['num__gas_prod','num__longitude','num__latitude',
                      'imp__depth','imp__elevation']].describe())
df_transformed

df = pd.read_csv('../data/encoded_gaswells.csv')
print(f'Data Shape: {df.shape}')

coords = np.array(df[['num__longitude', 'num__latitude']].values)
vals = np.array(df['num__gas_prod'].values)

x = coords[:, 0]
y = coords[:, 1]
xx, yy = np.mgrid[x.min():x.max():100j, y.min():y.max():100j]

x_ = (x - x.min()) / (x.max() - x.min()) * 100
y_ = (y - y.min()) / (y.max() - y.min()) * 100

print(f'\
\nSplit Shapes\n------------\n\
coords:   {coords.shape}\n\
target:   {vals.shape}\n\
gridx:    {xx.shape}\n\
gridy:    {yy.shape}')

V = DirectionalVariogram(
    coordinates=coords,
    values=vals.flatten())

V.plot()
plt.title('Variogram Reflecting Spatial Correlation of Natural Gas')
plt.savefig('../images/base_variogram.png')

kriging = OrdinaryKriging(V)
field = kriging.transform(xx.flatten(), yy.flatten()).reshape(xx.shape)
s2 = kriging.sigma.reshape(xx.shape)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,5.5))
gas_interp = ax1.matshow(field.T, origin='lower', cmap='plasma', 
                   interpolation='bilinear',
                   vmin=vals.min(), vmax=vals.max(),
                   extent=[x.min(), x.max(), y.min(), y.max()])
ax1.set_title('Interpolation', fontweight='bold', fontsize=16)
ax1.plot(x, y, '+w')
ax1.set_xlabel('LONGITUDE (°)', color='black', fontweight='bold', fontsize=14)
ax1.set_ylabel('LATITUDE (°)', color='black', fontweight='bold', fontsize=14)
ax1.set_xlim((x.min(), x.max()))
ax1.set_ylim((y.min(), y.max()))
ax1.set_yticks(np.arange(y.min(), y.max(), 0.2))
ax1.set_aspect(abs(x.max()-x.min())/abs(y.max()-y.min()))
plt.colorbar(gas_interp, ax=ax1)

gas_error = ax2.matshow(s2.T, origin='lower', cmap='YlGn_r',
                   interpolation='bilinear',
                   extent=[x.min(), x.max(), y.min(), y.max()])
ax2.set_title('Residual', fontweight='bold', fontsize=16)
ax2.plot(x, y, '+w')
ax2.set_xlabel('LONGITUDE (°)', color='black', fontweight='bold', fontsize=14)
ax2.set_ylabel('LATITUDE (°)', color='black', fontweight='bold', fontsize=14)
ax2.set_xlim((x.min(), x.max()))
ax2.set_ylim((y.min(), y.max()))
ax2.set_yticks(np.arange(y.min(), y.max(), 0.2))
ax2.set_aspect(abs(x.max()-x.min())/abs(y.max()-y.min()))
plt.colorbar(gas_error, ax=ax2)

fig.suptitle('Kriging Interpolation of Natural Gas\n\n', fontsize=18, fontweight='bold')
plt.show()
plt.tight_layout(rect=[0,0,1,0.95])
fig.savefig('../images/base_ordinary.png')
plt.close()

with open('../model/base_ordinary.pkl', 'wb') as file:
    pickle.dump(kriging, file)