In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import copy as cp
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [2]:
# Import well tops dataframe, columns should be the format - ['Well_Name', 'Surface', 'X', 'Y', 'Z', 'MD', 'TWT_Picked', 'Z_depth', 'Vint', 'TWT_Auto']
tops_columns = ['Well_Name', 'Surface', 'X', 'Y', 'Z', 'MD', 'TWT_Picked', 'Z_depth', 'Vint', 'TWT_Auto']
headers = [i.strip() for i in open("../Well_Tops").readlines()[5:33]]
well_master_df = pd.read_csv("../Well_Tops", delim_whitespace=True, skiprows =34, names=headers)

WELL_TEST = ["22/9-4", "23/21a-8","23/21-5","22/15-3","23/21-6","22/10b-6","22/10a-4", "23/11-2","22/14b-6X","22/14b-6U",
             "23/6-1","22/14b-6W","22/14b-9", "22/14b-6V","22/14b-6S", "22/15-4", "22/14b-6T","22/14b-9Z","22/14b-6Q",
             "23/16b-9","23/16f-11","22/14b-8","23/21-7Z","22/14b-6Y","22/19b-4","23/16f-12","23/21-7","22/13b-7Z","23/21-1"]

TOPS ='TWT_Auto'

In [3]:
well_master_df.rename(columns={"TWT auto": TOPS, "TWT picked": "TWT_Picked", "Z":"Z_depth"}, inplace=True)
well_master_df["Z_depth"] = well_master_df["Z_depth"] * -1

In [4]:
well_master_df[TOPS] = (well_master_df[TOPS] /1000)*-1

In [5]:
nunique_df = well_master_df.groupby('Surface')['X'].nunique()
well_master_df[['TWT_Picked', 'TWT_Auto']] = well_master_df[['TWT_Picked', 'TWT_Auto']] 
well_master_df["Z_depth"] = well_master_df["Z_depth"]*0.3048
well_master_df.drop(columns=["MD", "X", "Y", "TWT_Picked"], inplace=True)

In [6]:
well_master_df[["Z_depth_T-1", 'TWT_Auto_T-1']] = well_master_df.sort_values("Z_depth",ascending=True).groupby("Well")[["Z_depth",'TWT_Auto']].shift(1)

In [7]:
well_master_df["Z_depth_T-1"].fillna(value=0, inplace =True)
well_master_df["TWT_Auto_T-1"].fillna(value=0, inplace =True)

In [8]:
# well_master_df["isochron_picked"] = (well_master_df["TWT_Picked"]*0.5)-(well_master_df["TWT_Picked_T-1"]*0.5)
well_master_df["isochron_auto"] = (well_master_df["TWT_Auto"]*0.5)-(well_master_df["TWT_Auto_T-1"]*0.5)

well_master_df["ischore"] = well_master_df["Z_depth"]-well_master_df["Z_depth_T-1"]

# well_master_df["int_vel_Picked"] = well_master_df["ischore"] / well_master_df["isochron_picked"] 
well_master_df["Vint_int"] = well_master_df["ischore"] / well_master_df["isochron_auto"] 
well_master_df["Vint_int"].fillna(0, inplace=True)

In [9]:
well_master_df["mid-point_Z"] = (well_master_df["ischore"] /2) + well_master_df["Z_depth_T-1"]
df_train_global = well_master_df.loc[~well_master_df["Well"].isin(WELL_TEST)]
df_test_global = well_master_df.loc[well_master_df["Well"].isin(WELL_TEST)]

In [10]:
df_dummys = pd.get_dummies(well_master_df, columns=["Surface"], prefix=["IS_SURFACE_"])

In [11]:
df_final = df_dummys

In [12]:
df_final["Wells"] = well_master_df["Well"]
df_train = df_final.loc[~df_final.Wells.isin(WELL_TEST)]
df_test = df_final.loc[df_final.Wells.isin(WELL_TEST)]

In [13]:
dummy_cols = [i for i in df_final.columns if "IS_SURFACE_" in i or "IS_WELL_" in i]

In [14]:
def func_fitting(X_series, y_series, df, regressor = LinearRegression):
#     X_temp = np.array(df[X_series]).reshape(len(is_df_well),1)
#     y_temp = np.array(df[y_series]).reshape(len(is_df_well),1)
    
    regressor_temp = LinearRegression()
    regressor_temp.fit(df[X_series].values, df[y_series].values)
    return regressor_temp

In [15]:
training_cols_TWT_auto = cp.deepcopy(dummy_cols)
training_cols_TWT_auto.append('TWT_Auto')
training_cols_mid_point_Z = cp.deepcopy(dummy_cols)
training_cols_mid_point_Z.append('mid-point_Z')

In [16]:
regressor_t_z_depth = func_fitting(training_cols_TWT_auto, 'Z_depth', df_train)

In [17]:
# regressor_mz_depth = func_fitting(training_cols_mid_point_Z, 'Z_depth', df_train)

In [18]:
# regressor_mz_vint = func_fitting(training_cols_mid_point_Z, 'Vint_int', df_train)

In [31]:
regressor_t_vint = func_fitting(training_cols_TWT_auto, 'Vint_int', df_train)

In [18]:
df_test_global["TVD_pred"] = regressor_t_z_depth.predict(df_test[training_cols_TWT_auto].values) / 0.3048

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [28]:
if os.path.exists("./data") is False:
    os.makedirs("./data")
df_test_global.rename(columns ={"Z_depth":"TVD"}, inplace=True)
df_test_global["TVD"] = df_test_global.TVD / 0.3048
df_test_global.to_csv("data/test_tZ_prediction.csv")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [29]:
df_test_global2 = df_test_global.drop(columns="TVD_pred").copy()

In [32]:
df_test_global2["pred_isochore"] = regressor_t_vint.predict(df_test[training_cols_TWT_auto].values) * df_test_global2["isochron_auto"]

In [34]:
df_test_global2["TVD_pred"] = df_test_global2.sort_values("TVD", ascending=True).groupby("Well")["pred_isochore"].cumsum()

In [36]:
df_test_global2.loc[df_test_global2.Well == "22/9-4"].sort_values("TVD")

Unnamed: 0,TVD,TWT_Auto,Geological age,Type,Surface,Well,Interpreter,Confidence factor,Dip angle,Dip azimuth,...,"FLOAT,GR_min_Ichron","FLOAT,GR_max_Ichron",Z_depth_T-1,TWT_Auto_T-1,isochron_auto,ischore,Vint_int,mid-point_Z,pred_isochore,TVD_pred
3,302.0,0.11953,-999,Horizon,Seabed,22/9-4,jnasseri,-999,-999,-999,...,-999.0,-999.0,0.0,0.0,0.059765,92.0496,1540.19242,46.0248,96.172956,96.172956
39,6393.66,2.0656,-999,Horizon,Horda Fm,22/9-4,jnasseri,-999,-999,-999,...,-999.0,-999.0,92.0496,0.11953,0.973035,1856.737968,1908.192375,1020.418584,1946.543399,2042.716355
72,7973.54,2.5123,-999,Horizon,Balder Fm,22/9-4,jnasseri,-999,-999,-999,...,-999.0,-999.0,1948.787568,2.0656,0.22335,481.547424,2156.021598,2189.56128,471.952446,2514.668801
84,8096.53,2.54324,-999,Horizon,Balder Tuff,22/9-4,jnasseri,-999,-999,-999,...,-999.0,-999.0,2430.334992,2.5123,0.01547,37.487352,2423.228959,2449.078668,39.784899,2554.4537
122,8194.53,2.56126,-999,Horizon,Sele Fm,22/9-4,jnasseri,-999,-999,-999,...,-999.0,-999.0,2467.822344,2.54324,0.00901,29.8704,3315.249723,2482.757544,24.880374,2579.334073
160,8453.52,2.62244,-999,Horizon,Top Forties,22/9-4,jnasseri,-999,-999,-999,...,-999.0,-999.0,2497.692744,2.56126,0.03059,78.940152,2580.586858,2537.16282,86.348891,2665.682964
217,8667.5,2.66207,-999,Horizon,Lista Fm,22/9-4,jnasseri,-999,-999,-999,...,-999.0,-999.0,2576.632896,2.62244,0.019815,65.221104,3291.50159,2609.243448,65.072252,2730.755216
235,8700.5,2.66962,-999,Horizon,Mey Sst Mb,22/9-4,jnasseri,-999,-999,-999,...,-999.0,-999.0,2641.854,2.66207,0.003775,10.0584,2664.476821,2646.8832,12.143215,2742.898431
253,9536.36,2.82159,-999,Horizon,Maureen Fm,22/9-4,jnasseri,-999,-999,-999,...,-999.0,-999.0,2651.9124,2.66962,0.075985,254.770128,3352.900283,2779.297464,262.242789,3005.14122
405,9737.32,2.85271,-999,Horizon,TD,22/9-4,jnasseri,-999,-999,-999,...,-999.0,-999.0,2906.682528,2.82159,0.01556,61.252608,3936.542931,2937.308832,63.63498,3068.7762
