In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import copy as cp
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [2]:
# Import well tops dataframe, columns should be the format - ['Well_Name', 'Surface', 'X', 'Y', 'Z', 'MD', 'TWT_Picked', 'Z_depth', 'Vint', 'TWT_Auto']
tops_columns = ['Well_Name', 'Surface', 'X', 'Y', 'Z', 'MD', 'TWT_Picked', 'Z_depth', 'Vint', 'TWT_Auto']
headers = [i.strip() for i in open("Well_Tops").readlines()[5:33]]
well_master_df = pd.read_csv("Well_Tops", delim_whitespace=True, skiprows =34, names=headers)

WELL_TEST = ["22/9-4", "23/21a-8","23/21-5","22/15-3","23/21-6","22/10b-6","22/10a-4", "23/11-2","22/14b-6X","22/14b-6U",
             "23/6-1","22/14b-6W","22/14b-9", "22/14b-6V","22/14b-6S", "22/15-4", "22/14b-6T","22/14b-9Z","22/14b-6Q",
             "23/16b-9","23/16f-11","22/14b-8","23/21-7Z","22/14b-6Y","22/19b-4","23/16f-12","23/21-7","22/13b-7Z","23/21-1"]

TOPS ='TWT_Auto'

In [3]:
well_master_df.rename(columns={"TWT auto": TOPS, "TWT picked": "TWT_Picked", "Z":"Z_depth"}, inplace=True)
well_master_df["Z_depth"] = well_master_df["Z_depth"] *-1

In [4]:
well_master_df[TOPS] = (well_master_df[TOPS] /1000)*-1

In [5]:
nunique_df = well_master_df.groupby('Surface')['X'].nunique()
well_master_df[['TWT_Picked', 'TWT_Auto']] = well_master_df[['TWT_Picked', 'TWT_Auto']] 
well_master_df["Z_depth"] = well_master_df["Z_depth"]*0.3048
well_master_df.drop(columns=["MD", "X", "Y", "TWT_Picked"], inplace=True)

In [24]:
well_master_df.head()

Unnamed: 0,Z_depth,TWT_Auto,Geological age,Type,Surface,Well,Interpreter,Confidence factor,Dip angle,Dip azimuth,...,Symbol,Locked to fault,"FLOAT,GR_min_Ichron","FLOAT,GR_max_Ichron",Z_depth_T-1,TWT_Auto_T-1,isochron_auto,ischore,Vint_int,mid-point_Z
0,89.916,0.11791,-999,Horizon,Seabed,22/19-2,jnasseri,-999,-999,-999,...,0,0,-999.0,-999.0,0.0,0.0,0.058955,89.916,1525.16326,44.958
1,91.7448,0.10312,-999,Horizon,Seabed,22/20-1,jnasseri,-999,-999,-999,...,0,0,-999.0,-999.0,0.0,0.0,0.05156,91.7448,1779.379364,45.8724
2,91.1352,0.12713,-999,Horizon,Seabed,22/14a-2,jnasseri,-999,-999,-999,...,0,0,-999.0,-999.0,0.0,0.0,0.063565,91.1352,1433.7324,45.5676
3,92.0496,0.11953,-999,Horizon,Seabed,22/9-4,jnasseri,-999,-999,-999,...,0,0,-999.0,-999.0,0.0,0.0,0.059765,92.0496,1540.19242,46.0248
4,89.916,0.14256,-999,Horizon,Seabed,22/14a-7,jnasseri,-999,-999,-999,...,0,0,-999.0,-999.0,0.0,0.0,0.07128,89.916,1261.447811,44.958


In [7]:
well_master_df[["Z_depth_T-1", 'TWT_Auto_T-1']] = well_master_df.sort_values("Z_depth",ascending=True).groupby("Well")[["Z_depth",'TWT_Auto']].shift(1)

In [8]:
well_master_df["Z_depth_T-1"].fillna(value=0, inplace =True)
well_master_df["TWT_Auto_T-1"].fillna(value=0, inplace =True)

In [9]:
# well_master_df["isochron_picked"] = (well_master_df["TWT_Picked"]*0.5)-(well_master_df["TWT_Picked_T-1"]*0.5)
well_master_df["isochron_auto"] = (well_master_df["TWT_Auto"]*0.5)-(well_master_df["TWT_Auto_T-1"]*0.5)

well_master_df["ischore"] = well_master_df["Z_depth"]-well_master_df["Z_depth_T-1"]

# well_master_df["int_vel_Picked"] = well_master_df["ischore"] / well_master_df["isochron_picked"] 
well_master_df["Vint_int"] = well_master_df["ischore"] / well_master_df["isochron_auto"] 
well_master_df["Vint_int"].fillna(0, inplace=True)

In [10]:
well_master_df["mid-point_Z"] = (well_master_df["ischore"] /2) + well_master_df["Z_depth_T-1"]
df_train_global = well_master_df.loc[~well_master_df["Well"].isin(WELL_TEST)]
df_test_global = well_master_df.loc[well_master_df["Well"].isin(WELL_TEST)]

In [26]:
df_train_global

Unnamed: 0,Z_depth,TWT_Auto,Geological age,Type,Surface,Well,Interpreter,Confidence factor,Dip angle,Dip azimuth,...,Symbol,Locked to fault,"FLOAT,GR_min_Ichron","FLOAT,GR_max_Ichron",Z_depth_T-1,TWT_Auto_T-1,isochron_auto,ischore,Vint_int,mid-point_Z
0,89.916000,0.11791,-999,Horizon,Seabed,22/19-2,jnasseri,-999,-999,-999,...,0,0,-999.0,-999.0,0.000000,0.00000,0.058955,89.916000,1525.163260,44.958000
1,91.744800,0.10312,-999,Horizon,Seabed,22/20-1,jnasseri,-999,-999,-999,...,0,0,-999.0,-999.0,0.000000,0.00000,0.051560,91.744800,1779.379364,45.872400
2,91.135200,0.12713,-999,Horizon,Seabed,22/14a-2,jnasseri,-999,-999,-999,...,0,0,-999.0,-999.0,0.000000,0.00000,0.063565,91.135200,1433.732400,45.567600
3,92.049600,0.11953,-999,Horizon,Seabed,22/9-4,jnasseri,-999,-999,-999,...,0,0,-999.0,-999.0,0.000000,0.00000,0.059765,92.049600,1540.192420,46.024800
4,89.916000,0.14256,-999,Horizon,Seabed,22/14a-7,jnasseri,-999,-999,-999,...,0,0,-999.0,-999.0,0.000000,0.00000,0.071280,89.916000,1261.447811,44.958000
5,90.830400,0.13540,-999,Horizon,Seabed,22/9-1,jnasseri,-999,-999,-999,...,0,0,-999.0,-999.0,0.000000,0.00000,0.067700,90.830400,1341.660266,45.415200
6,90.220800,0.10599,-999,Horizon,Seabed,22/9-2,jnasseri,-999,-999,-999,...,0,0,-999.0,-999.0,0.000000,0.00000,0.052995,90.220800,1702.439853,45.110400
7,90.830400,0.10497,-999,Horizon,Seabed,22/8a-2,jnasseri,-999,-999,-999,...,0,0,-999.0,-999.0,0.000000,0.00000,0.052485,90.830400,1730.597314,45.415200
8,91.440000,0.09567,-999,Horizon,Seabed,22/8a-3,jnasseri,-999,-999,-999,...,0,0,-999.0,-999.0,0.000000,0.00000,0.047835,91.440000,1911.571025,45.720000
9,87.172800,0.09906,-999,Horizon,Seabed,22/9-5,jnasseri,-999,-999,-999,...,0,0,-999.0,-999.0,0.000000,0.00000,0.049530,87.172800,1760.000000,43.586400


In [11]:
df_dummys = pd.get_dummies(well_master_df, columns=["Surface", "Well"], prefix=["IS_SURFACE_", "IS_WELL_"])

In [12]:
df_final = df_dummys

In [13]:
df_final["Wells"] = well_master_df["Well"]
df_train = df_final.loc[~df_final.Wells.isin(WELL_TEST)]
df_test = df_final.loc[df_final.Wells.isin(WELL_TEST)]

In [14]:
dummy_cols = [i for i in df_final.columns if "IS_SURFACE_" in i or "IS_WELL_" in i]

In [15]:
def func_fitting(X_series, y_series, df, regressor = LinearRegression):
#     X_temp = np.array(df[X_series]).reshape(len(is_df_well),1)
#     y_temp = np.array(df[y_series]).reshape(len(is_df_well),1)
    
    regressor_temp = LinearRegression()
    regressor_temp.fit(df[X_series].values, df[y_series].values)
    return regressor_temp

In [17]:
training_cols_TWT_auto = cp.deepcopy(dummy_cols)
training_cols_TWT_auto.append('TWT_Auto')
training_cols_mid_point_Z = cp.deepcopy(dummy_cols)
training_cols_mid_point_Z.append('mid-point_Z')

In [18]:
regressor_t_z_depth = func_fitting(training_cols_TWT_auto, 'Z_depth', df_train)

In [19]:
regressor_mz_depth = func_fitting(training_cols_mid_point_Z, 'Z_depth', df_train)

In [20]:
regressor_mz_vint = func_fitting(training_cols_mid_point_Z, 'Vint_int', df_train)

In [21]:
regressor_t_vint = func_fitting(training_cols_TWT_auto, 'Vint_int', df_train)

In [37]:
df_test_global["pred_tz"] = regressor_t_z_depth.predict(df_test[training_cols_TWT_auto].values)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [40]:
df_test_global["vint_pred"] = regressor_t_vint.predict(df_test[training_cols_TWT_auto].values)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [23]:
df_test_global

Unnamed: 0,Z_depth,TWT_Auto,Geological age,Type,Surface,Well,Interpreter,Confidence factor,Dip angle,Dip azimuth,...,Symbol,Locked to fault,"FLOAT,GR_min_Ichron","FLOAT,GR_max_Ichron",Z_depth_T-1,TWT_Auto_T-1,isochron_auto,ischore,Vint_int,mid-point_Z
