In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import copy as cp
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [2]:
# Import well tops dataframe, columns should be the format - ['Well_Name', 'Surface', 'X', 'Y', 'Z', 'MD', 'TWT_Picked', 'Z_depth', 'Vint', 'TWT_Auto']
tops_columns = ['Well_Name', 'Surface', 'X', 'Y', 'Z', 'MD', 'TWT_Picked', 'Z_depth', 'Vint', 'TWT_Auto']
headers = [i.strip() for i in open("../Well_Tops").readlines()[5:33]]
well_master_df = pd.read_csv("../Well_Tops", delim_whitespace=True, skiprows =34, names=headers)

WELL_TEST = ["22/9-4", "23/21a-8","23/21-5","22/15-3","23/21-6","22/10b-6","22/10a-4", "23/11-2","22/14b-6X","22/14b-6U",
             "23/6-1","22/14b-6W","22/14b-9", "22/14b-6V","22/14b-6S", "22/15-4", "22/14b-6T","22/14b-9Z","22/14b-6Q",
             "23/16b-9","23/16f-11","22/14b-8","23/21-7Z","22/14b-6Y","22/19b-4","23/16f-12","23/21-7","22/13b-7Z","23/21-1"]

TOPS ='TWT_Auto'

In [3]:
well_master_df.rename(columns={"TWT auto": TOPS, "TWT picked": "TWT_Picked", "Z":"Z_depth"}, inplace=True)
well_master_df["Z_depth"] = well_master_df["Z_depth"] * -1

In [4]:
well_master_df[TOPS] = (well_master_df[TOPS] /1000)*-1

In [5]:
nunique_df = well_master_df.groupby('Surface')['X'].nunique()
well_master_df[['TWT_Picked', 'TWT_Auto']] = well_master_df[['TWT_Picked', 'TWT_Auto']] 
well_master_df["Z_depth"] = well_master_df["Z_depth"]*0.3048
well_master_df.drop(columns=["MD", "X", "Y", "TWT_Picked"], inplace=True)

In [6]:
well_master_df[["Z_depth_T-1", 'TWT_Auto_T-1']] = well_master_df.sort_values("Z_depth",ascending=True).groupby("Well")[["Z_depth",'TWT_Auto']].shift(1)

In [7]:
well_master_df["Z_depth_T-1"].fillna(value=0, inplace =True)
well_master_df["TWT_Auto_T-1"].fillna(value=0, inplace =True)

In [8]:
# well_master_df["isochron_picked"] = (well_master_df["TWT_Picked"]*0.5)-(well_master_df["TWT_Picked_T-1"]*0.5)
well_master_df["isochron_auto"] = (well_master_df["TWT_Auto"]*0.5)-(well_master_df["TWT_Auto_T-1"]*0.5)

well_master_df["ischore"] = well_master_df["Z_depth"]-well_master_df["Z_depth_T-1"]

# well_master_df["int_vel_Picked"] = well_master_df["ischore"] / well_master_df["isochron_picked"] 
well_master_df["Vint_int"] = well_master_df["ischore"] / well_master_df["isochron_auto"] 
well_master_df["Vint_int"].fillna(0, inplace=True)

In [9]:
well_master_df["mid-point_Z"] = (well_master_df["ischore"] /2) + well_master_df["Z_depth_T-1"]
df_train_global = well_master_df.loc[~well_master_df["Well"].isin(WELL_TEST)]
df_test_global = well_master_df.loc[well_master_df["Well"].isin(WELL_TEST)]

In [10]:
df_dummys = pd.get_dummies(well_master_df, columns=["Surface"], prefix=["IS_SURFACE_"])

In [11]:
df_final = df_dummys

In [12]:
df_final["Wells"] = well_master_df["Well"]
df_train = df_final.loc[~df_final.Wells.isin(WELL_TEST)]
df_test = df_final.loc[df_final.Wells.isin(WELL_TEST)]

In [13]:
dummy_cols = [i for i in df_final.columns if "IS_SURFACE_" in i or "IS_WELL_" in i]

In [14]:
def func_fitting(X_series, y_series, df, regressor = LinearRegression):
#     X_temp = np.array(df[X_series]).reshape(len(is_df_well),1)
#     y_temp = np.array(df[y_series]).reshape(len(is_df_well),1)
    
    regressor_temp = LinearRegression()
    regressor_temp.fit(df[X_series].values, df[y_series].values)
    return regressor_temp

In [15]:
training_cols_TWT_auto = cp.deepcopy(dummy_cols)
training_cols_TWT_auto.append('TWT_Auto')
training_cols_mid_point_Z = cp.deepcopy(dummy_cols)
training_cols_mid_point_Z.append('mid-point_Z')

In [16]:
regressor_t_z_depth = func_fitting(training_cols_TWT_auto, 'Z_depth', df_train)

In [17]:
# regressor_mz_depth = func_fitting(training_cols_mid_point_Z, 'Z_depth', df_train)

In [18]:
# regressor_mz_vint = func_fitting(training_cols_mid_point_Z, 'Vint_int', df_train)

In [19]:
regressor_t_vint = func_fitting(training_cols_TWT_auto, 'Vint_int', df_train)

In [20]:
df_test_global["TVD_pred"] = regressor_t_z_depth.predict(df_test[training_cols_TWT_auto].values) / 0.3048

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [21]:
if os.path.exists("./data") is False:
    os.makedirs("./data")
df_test_global.rename(columns ={"Z_depth":"TVD"}, inplace=True)
df_test_global["TVD"] = df_test_global.TVD / 0.3048
df_test_global.to_csv("data/test_tZ_prediction.csv")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [22]:
df_test_global2 = df_test_global.drop(columns="TVD_pred").copy()

In [23]:
df_test_global2["pred_isochore"] = regressor_t_vint.predict(df_test[training_cols_TWT_auto].values) * df_test_global2["isochron_auto"]

In [24]:
df_test_global2["TVD_pred"] = df_test_global2.sort_values("TVD", ascending=True).groupby("Well")["pred_isochore"].cumsum()
df_test_global2["TVD_pred"] = df_test_global2["TVD_pred"] / 0.3048

In [30]:
df_test_global.loc[df_test_global2.Well == "22/9-4"].sort_values("TVD")[["Surface","TVD", "TVD_pred"]]

Unnamed: 0,Surface,TVD,TVD_pred
3,Seabed,302.0,319.818925
39,Horda Fm,6393.66,6114.68703
72,Balder Fm,7973.54,7896.052507
84,Balder Tuff,8096.53,8112.030386
122,Sele Fm,8194.53,8111.68318
160,Top Forties,8453.52,8413.247196
217,Lista Fm,8667.5,8613.766303
235,Mey Sst Mb,8700.5,8622.695299
253,Maureen Fm,9536.36,9455.242168
405,TD,9737.32,10809.654344


In [26]:
if os.path.exists("./data") is False:
    os.makedirs("./data")
df_test_global2.to_csv("data/test_vint_prediction.csv")